#  File src/library/tools/R/read.00Index.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2012 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

read.00Index <-
function(file)
{
    if(is.character(file)) {
        if(file == "") file <- stdin()
        else {
            file <- file(file, "r")
            on.exit(close(file))
        }
    }
    if(!inherits(file, "connection"))
        stop(gettextf("argument '%s' must be a character string or connection",
                      file),
             domain = NA)

    y <- matrix("", nrow = 0L, ncol = 2L)
    x <- paste(readLines(file), collapse = "\n")

    ## <FIXME>
    ## We cannot necessarily assume that the 00Index-style file to be
    ## read in was generated by @code{Rdindex()} or by R using
    ## formatDL(style = "table").  In particular, some packages have
    ## 00Index files with (section) headers and footers in addition to
    ## the data base chunks which are description lists rendered in
    ## tabular form.  Hence, we need some heuristic for identifying the
    ## db chunks.  Easy to the human eye (is there a column for aligning
    ## entries?) but far from trivial ... as a first approximation we
    ## try to consider chunks containing at least one tab or three
    ## spaces a db chunk.  (A better heuristic would be the following:
    ## entries rendered in one line have item and description separated
    ## by at least 3 spaces or tabs; entries with a line break have
    ## continuation lines starting with whitespace (no test whether for
    ## alignment).  If a chunk is made of such entries only it is
    ## considered a db chunk.  But not all current packages follow this
    ## scheme.  Argh.)
    ## Clearly we need to move to something better in future versions.
    ## </FIXME>

    ## First split into paragraph chunks separated by whitespace-only
    ## lines.
    for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) {
        entries <- tryCatch({
            if(!grepl("(   |\t)", chunk))
                NULL
            else {
                ## Combine entries with continuation lines.
                chunk <- gsub("\n[ \t]+", "\t", chunk)
                ## Split into lines and then according to whitespace.
                x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]")
                cbind(unlist(lapply(x, "[[", 1L)),
                      unlist(lapply(x, function(t) {
                          paste(t[-c(1L, which(!nzchar(t)))],
                                collapse = " ")
                      })))
            }
        },
                            error = identity)
        if(!inherits(entries, "error") && NCOL(entries) == 2L)
            y <- rbind(y, entries)
    }
    colnames(y) <- c("Item", "Description")
    y
}
